{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## BONUS [Not in Chaper]:  bi-gram , tri_gram and quad_gram  phrase detection"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**NOTE**: This notebook depends upon the the Retrotech dataset. If you have any issues, please rerun the [Setting up the Retrotech Dataset](../ch04/1.setting-up-the-retrotech-dataset.ipynb) notebook."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[nltk_data] Downloading package averaged_perceptron_tagger to\n",
      "[nltk_data]     /home/jovyan/nltk_data...\n",
      "[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.\n"
     ]
    }
   ],
   "source": [
    "import nltk\n",
    "from nltk.collocations import *\n",
    "import re\n",
    "import pandas \n",
    "from nltk.corpus import webtext\n",
    "from nltk.tokenize import RegexpTokenizer\n",
    "nltk.download('averaged_perceptron_tagger')\n",
    "from pyspark.sql import SparkSession\n",
    "spark = SparkSession.builder.appName(\"aips-ch6\").getOrCreate()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_2495/2060178846.py:3: DtypeWarning: Columns (3) have mixed types. Specify dtype option on import or set low_memory=False.\n",
      "  signal_all = pandas.read_csv(\"data/retrotech/signals.csv\")\n"
     ]
    }
   ],
   "source": [
    "#signal_sample= pandas.read_json(\"../data/temp/signal_sample.json\")\n",
    "\n",
    "signal_all= pandas.read_csv(\"../../data/retrotech/signals.csv\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "is_query =  signal_all['type']=='query'\n",
    "signal_query  = signal_all[is_query]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>query_id</th>\n",
       "      <th>user</th>\n",
       "      <th>type</th>\n",
       "      <th>target</th>\n",
       "      <th>signal_time</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>u2_1_2</td>\n",
       "      <td>u2</td>\n",
       "      <td>query</td>\n",
       "      <td>rca</td>\n",
       "      <td>2020-05-04 08:28:21.1848</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>u3_0_1</td>\n",
       "      <td>u3</td>\n",
       "      <td>query</td>\n",
       "      <td>macbook</td>\n",
       "      <td>2019-12-22 00:07:07.0152</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  query_id user   type   target               signal_time\n",
       "1   u2_1_2   u2  query      rca  2020-05-04 08:28:21.1848\n",
       "2   u3_0_1   u3  query  macbook  2019-12-22 00:07:07.0152"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "signal_query[1:3]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "#use the real signals\n",
    "signals_collection=\"signals\"\n",
    "signals_opts={\"zkhost\": \"aips-zk\", \"collection\": signals_collection}\n",
    "df = spark.read.format(\"solr\").options(**signals_opts).load()\n",
    "df.createOrReplaceTempView(\"signals\")\n",
    "\n",
    "### Create user-searchs table each raw represent one search query.\n",
    "query_signals = spark.sql(\"\"\"\n",
    "  SELECT lower(trim(searches.target)) as keyword, searches.user as user \n",
    "  FROM signals as searches where searches.type='query'\n",
    "  GROUP BY keyword, user\"\"\").collect() #only one signal per user per keyword to prevent spam"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 1: data cleaning: \n",
    "\n",
    "tokenize text and only keep words or digits, only keep tokens which have length greater than 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[['beats', 'dre'],\n",
       " ['beats', 'dre', 'headphones'],\n",
       " ['epad'],\n",
       " ['fringe'],\n",
       " ['gps', 'tracker']]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def cleaning(text):\n",
    "    tokens = []\n",
    "    tokenizer = RegexpTokenizer(r'\\w+') \n",
    "    \n",
    "    tokened = tokenizer.tokenize(text.lower())\n",
    "    \n",
    "    for token in tokened:\n",
    "        if len(token) > 2 and not token.isdigit(): #keep tokens longer than 2 characters and drop digit only tokens\n",
    "            tokens.append(token)\n",
    "            \n",
    "    return tokens\n",
    "\n",
    "signal_tokened = []\n",
    "\n",
    "#for query in signal_sample[\"query_s\"]:\n",
    "for row in query_signals:\n",
    "    query = row[\"keyword\"]\n",
    "#for query in signal_query[\"target\"]:\n",
    "    tokens = cleaning(query)\n",
    "    #print(tokens)\n",
    "    signal_tokened.append(tokens)\n",
    "    \n",
    "signal_tokened[:5]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 2: Find candidate bi-gram phrases based on frequency. \n",
    "\n",
    "using nltk collocation bigram function to find candidate bigram phrases, a frequency filter is applied to only keep bigrams with frequency greater or equal to 3. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "bigram_measures = nltk.collocations.BigramAssocMeasures()\n",
    "trigram_measures = nltk.collocations.TrigramAssocMeasures()\n",
    "quadgram_measures = nltk.collocations.QuadgramAssocMeasures()\n",
    "\n",
    "finder_bi = BigramCollocationFinder.from_documents(signal_tokened) #input is a list of token list\n",
    "finder_tri = TrigramCollocationFinder.from_documents(signal_tokened)\n",
    "finder_quad = QuadgramCollocationFinder.from_documents(signal_tokened)\n",
    "\n",
    "freq_threshold=3\n",
    "## apply freq filter for all grams\n",
    "finder_bi.apply_freq_filter(freq_threshold)\n",
    "finder_tri.apply_freq_filter(freq_threshold)\n",
    "finder_quad.apply_freq_filter(freq_threshold)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "bigram_measures = nltk.collocations.BigramAssocMeasures()\n",
    "trigram_measures = nltk.collocations.TrigramAssocMeasures()\n",
    "quadgram_measures = nltk.collocations.QuadgramAssocMeasures()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 3: Sort candidate phrases based on PMI and likelihood ratio. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Bigrams: [(('stanley', 'kubrick'), 18.078850517367755), (('viva', 'bam'), 18.078850517367755), (('anthony', 'hamilton'), 17.756922422480393), (('bangkok', 'knockout'), 17.756922422480393), (('barbara', 'streisand'), 17.756922422480393), (('bypass', 'module'), 17.756922422480393), (('cadillac', 'records'), 17.756922422480393), (('charred', 'walls'), 17.756922422480393), (('conditions', 'parole'), 17.756922422480393), (('daddy', 'yankee'), 17.756922422480393), (('darkwing', 'duck'), 17.756922422480393), (('dierks', 'bentley'), 17.756922422480393), (('drawn', 'together'), 17.756922422480393), (('due', 'date'), 17.756922422480393), (('dukes', 'hazzard'), 17.756922422480393), (('edward', 'scissorhands'), 17.756922422480393), (('ergo', 'proxy'), 17.756922422480393), (('fleet', 'foxes'), 17.756922422480393), (('greatest', 'hits'), 17.756922422480393), (('gurren', 'lagann'), 17.756922422480393)]\n",
      "Trigrams: [(('airborne', 'toxic', 'event'), 33.64990639453681), (('everybody', 'loves', 'raymond'), 33.350346112677904), (('quinn', 'medicine', 'woman'), 33.13533322170706), (('where', 'art', 'thou'), 32.909773521291925), (('smash', 'bros', 'brawl'), 32.7913788204897), (('framed', 'roger', 'rabbit'), 32.66584793840583), (('armin', 'van', 'buuren'), 32.45726131659442), (('forgetting', 'sarah', 'marshall'), 32.250810439126994), (('why', 'did', 'get'), 32.250810439126994), (('jedi', 'mind', 'tricks'), 32.20933380315083), (('ground', 'loop', 'isolator'), 32.15770103473551), (('insane', 'clown', 'posse'), 32.15770103473551), (('keep', 'from', 'crying'), 32.15770103473551), (('laugh', 'keep', 'from'), 32.15770103473551), (('dreams', 'may', 'come'), 32.11330691537705), (('what', 'dreams', 'may'), 32.11330691537705), (('ernest', 'scared', 'stupid'), 31.92888234423963), (('aqua', 'teen', 'hunger'), 31.872298815873265), (('battle', 'los', 'angeles'), 31.872298815873258), (('bush', 'sea', 'memories'), 31.73984851984961)]\n",
      "Quadgrams: [(('laugh', 'keep', 'from', 'crying'), 49.236551552103265), (('when', 'fish', 'ride', 'bicycles'), 49.235143357710456), (('what', 'dreams', 'may', 'come'), 49.19215743274481), (('why', 'did', 'get', 'married'), 47.32966095649475), (('brother', 'where', 'art', 'thou'), 46.684843290482576), (('who', 'framed', 'roger', 'rabbit'), 46.36965902442666), (('aqua', 'teen', 'hunger', 'force'), 46.33643948912581), (('wicked', 'this', 'way', 'comes'), 46.09940029182496), (('super', 'smash', 'bros', 'brawl'), 46.024739286913075), (('teenage', 'mutant', 'ninja', 'turtles'), 45.008973210697015), (('scooby', 'doo', 'where', 'are'), 44.569717307401774), (('not', 'that', 'into', 'you'), 44.08398941699153), (('doo', 'where', 'are', 'you'), 43.629423553526046), (('music', 'better', 'than', 'words'), 43.18503457565429), (('something', 'wicked', 'this', 'way'), 42.37693426735387), (('hes', 'just', 'not', 'that'), 42.24206552069067), (('jeff', 'dunham', 'controlled', 'chaos'), 42.21331699525727), (('planes', 'trains', 'and', 'automobiles'), 42.04724818065044), (('taylor', 'swift', 'speak', 'now'), 42.022334899077734), (('clap', 'your', 'hands', 'say'), 41.78658078936057), (('your', 'hands', 'say', 'yeah'), 41.78658078936057), (('fear', 'and', 'loathing', 'las'), 40.6451497370791), (('ace', 'combat', 'assault', 'horizon'), 40.61876037624317), (('just', 'not', 'that', 'into'), 40.57910050796824), (('charred', 'walls', 'the', 'damned'), 40.43358088228214), (('two', 'door', 'cinema', 'club'), 40.053494807176264), (('and', 'loathing', 'las', 'vegas'), 39.6451497370791), (('tori', 'amos', 'night', 'hunters'), 39.51087908421934), (('peter', 'gabriel', 'new', 'blood'), 39.38648017162541), (('death', 'cab', 'for', 'cutie'), 38.95894630872415), (('stray', 'from', 'the', 'path'), 38.5855839757272), (('alice', 'cooper', 'welcome', 'nightmare'), 38.53598709460555), (('law', 'and', 'order', 'svu'), 38.481394502876995), (('tales', 'from', 'the', 'crypt'), 38.43358088228214), (('the', 'count', 'monte', 'cristo'), 38.32924422246741), (('breville', 'juice', 'fountain', 'plus'), 38.20874272307279), (('fast', 'times', 'ridgemont', 'high'), 38.13494834067996), (('black', 'ops', 'pre', 'owned'), 37.93359139885487), (('east', 'bound', 'and', 'down'), 37.87609691900606), (('seven', 'brides', 'for', 'seven'), 37.683749264181785), (('its', 'always', 'sunny', 'philedelphia'), 37.63472636773743), (('its', 'always', 'sunny', 'philidelphia'), 37.63472636773743), (('for', 'speed', 'most', 'wanted'), 37.591010971056875), (('its', 'always', 'sunny', 'philadelphia'), 37.442081289795034), (('big', 'sean', 'finally', 'famous'), 37.04805687638127), (('get', 'him', 'the', 'greek'), 37.01067513966997), (('fresh', 'prince', 'bel', 'air'), 36.66833377138181), (('theft', 'auto', 'san', 'andreas'), 36.402585581314234), (('beautiful', 'dark', 'twisted', 'fantasy'), 36.09471050292185), (('brides', 'for', 'seven', 'brothers'), 35.89947795523722), (('legend', 'zelda', 'skyward', 'sword'), 35.55805220444397), (('skullcandy', 'full', 'metal', 'jacket'), 35.51150478293588), (('how', 'met', 'your', 'mother'), 35.50187234066841), (('madeas', 'big', 'happy', 'family'), 35.4445129499184), (('once', 'upon', 'time', 'the'), 35.38918676292369), (('nothing', 'but', 'the', 'beat'), 35.29710236393297), (('dragon', 'ball', 'ultimate', 'tenkaichi'), 35.180341360381945), (('rocky', 'horror', 'picture', 'show'), 34.799355379084076), (('keeping', 'with', 'the', 'kardashians'), 34.76080198191141), (('spartacus', 'gods', 'the', 'arena'), 34.56475832750715), (('mophie', 'juice', 'pack', 'plus'), 34.4180451449688), (('the', 'red', 'jumpsuit', 'apparatus'), 34.31204736494212), (('how', 'meet', 'your', 'mother'), 34.30922726272601), (('red', 'hot', 'chilli', 'peppers'), 34.26122116469831), (('saints', 'row', 'the', 'third'), 34.25865519978147), (('red', 'hot', 'chili', 'peppers'), 34.16546218189288), (('people', 'under', 'the', 'stairs'), 34.107825709283304), (('how', 'train', 'your', 'dragon'), 34.00081216345952), (('spider', 'man', 'edge', 'time'), 33.8404939748012), (('need', 'for', 'speed', 'carbon'), 33.815364908726615), (('need', 'for', 'speed', 'undercover'), 33.815364908726615), (('need', 'for', 'speed', 'most'), 33.81536490872661), (('buffy', 'the', 'vampire', 'slayer'), 33.75550897716951), (('five', 'finger', 'death', 'punch'), 33.727938413334215), (('hot', 'tub', 'time', 'machine'), 33.572489092067315), (('the', 'devil', 'wears', 'prada'), 33.40137617193162), (('alvin', 'and', 'the', 'chipmunks'), 33.3735605277743), (('gone', 'with', 'the', 'wind'), 33.36225260542113), (('smokey', 'and', 'the', 'bandit'), 33.1809154498319), (('the', 'hills', 'have', 'eyes'), 33.16502217358332), (('the', 'deathly', 'hallows', 'part'), 33.16422039792342), (('madea', 'big', 'happy', 'family'), 33.01824819521629), (('heroes', 'might', 'and', 'magic'), 32.987618610407296), (('arkham', 'city', 'collectors', 'edition'), 32.97476073686836), (('wwe', 'money', 'the', 'bank'), 32.853636456327614), (('avatar', 'the', 'last', 'airbender'), 32.8523803003572), (('night', 'the', 'sun', 'came'), 32.702261851257084), (('two', 'and', 'half', 'men'), 32.678507888670175), (('nikon', 'cool', 'pix', 'l120'), 32.67540370937426), (('life', 'the', 'american', 'teenager'), 32.631549266577224), (('cabelas', 'big', 'game', 'hunter'), 32.35664678757833), (('harry', 'potter', 'deathly', 'hallows'), 31.87057866544472), (('modern', 'warfare', 'hardened', 'edition'), 31.750561552206214), (('grand', 'theft', 'auto', 'san'), 31.692927333143764), (('multi', 'region', 'dvd', 'player'), 31.53816659738837), (('jem', 'and', 'the', 'holograms'), 31.48047573169081), (('let', 'the', 'right', 'one'), 31.35506310814555), (('god', 'war', 'origins', 'collection'), 31.163591819396025), (('for', 'speed', 'hot', 'pursuit'), 31.138520306997982), (('harry', 'potter', 'ultimate', 'edition'), 30.911314334479968), (('xbox', 'live', 'gold', 'membership'), 30.84823021807143), (('met', 'your', 'mother', 'season'), 30.767736115992875), (('last', 'house', 'the', 'left'), 30.755609615099225), (('gears', 'war', 'limited', 'edition'), 30.64496356516023), (('country', 'for', 'old', 'men'), 30.468165031208905), (('the', 'thin', 'red', 'line'), 30.399671230249872), (('upon', 'time', 'the', 'west'), 30.305264279378605), (('kitchen', 'aid', 'stand', 'mixer'), 30.19467886276906), (('call', 'duty', 'black', 'ops'), 30.069849341515535), (('transformers', 'revenge', 'the', 'fallen'), 30.01522751856605), (('nightmare', 'elm', 'street', 'blu'), 30.011258308898306), (('and', 'the', 'chocolate', 'factory'), 30.004326718108572), (('new', 'kids', 'the', 'block'), 29.95465962098214), (('world', 'the', 'sideline', 'story'), 29.895513230969648), (('fox', 'and', 'the', 'hound'), 29.721483831194597), (('scott', 'pilgrim', 'the', 'world'), 29.634947080726604), (('return', 'the', 'living', 'dead'), 29.62869361269818), (('beauty', 'and', 'the', 'beast'), 29.513490788918766), (('full', 'motion', 'wall', 'mount'), 29.387658733339585), (('wonka', 'and', 'the', 'chocolate'), 29.286097686523952), (('mophie', 'juice', 'pack', 'air'), 29.266492964182795), (('save', 'the', 'last', 'dance'), 29.237962607159062), (('jay', 'and', 'kanye', 'west'), 29.229860673685003), (('secret', 'life', 'the', 'american'), 29.199703479611827), (('sony', 'cyber', 'shot', 'wx9'), 29.192264941542035), (('and', 'the', 'deathly', 'hallows'), 29.094942226722054), (('batman', 'arkham', 'city', 'collectors'), 28.914889280890968), (('stackable', 'washer', 'and', 'dryer'), 28.872233357611506), (('star', 'wars', 'complete', 'saga'), 28.86716866976844), (('dark', 'night', 'the', 'scarecrow'), 28.78972469250742), (('hands', 'free', 'car', 'kit'), 28.741180358757354), (('soulja', 'boy', 'the', 'movie'), 28.715090816789967), (('guitar', 'hero', 'warriors', 'rock'), 28.69681079612871), (('big', 'bang', 'theory', 'season'), 28.669684979325496), (('just', 'dance', 'summer', 'party'), 28.66966829221179), (('for', 'speed', 'the', 'run'), 28.633530352180145), (('the', 'nightmare', 'before', 'christmas'), 28.59866547797577), (('dance', 'the', 'vampire', 'bund'), 28.407887608601378), (('call', 'duty', 'modern', 'warfare'), 28.380301308545285), (('the', 'caribbean', 'stranger', 'tides'), 28.364520540662774), (('princess', 'and', 'the', 'frog'), 28.265036070996125), (('gears', 'war', 'season', 'pass'), 28.24544484794025), (('rage', 'against', 'the', 'machine'), 28.097972450901565), (('willy', 'wonka', 'and', 'the'), 28.005989767331215), (('pink', 'floyd', 'dark', 'side'), 27.749729416712483), (('ipod', 'nano', '6th', 'generation'), 27.73231483411537), (('watch', 'the', 'throne', 'deluxe'), 27.729417854643657), (('cell', 'phone', 'signal', 'boosters'), 27.725721975522646), (('mini', 'display', 'port', 'hdmi'), 27.590309774443966), (('queens', 'the', 'stone', 'age'), 27.515259342841873), (('xbox', 'data', 'transfer', 'cable'), 27.498911049602), (('duty', 'modern', 'warfare', 'hardened'), 27.48354819763172), (('need', 'for', 'speed', 'hot'), 27.48316847838543), (('wars', 'the', 'force', 'unleashed'), 27.433785184619254), (('canon', 'eos', 'rebel', 't3i'), 27.426575269754345), (('house', 'the', 'dead', 'overkill'), 27.363292192320472), (('blue', 'tooth', 'head', 'sets'), 27.345336912297462), (('night', 'the', 'living', 'dead'), 27.25785591732987), (('sony', 'ericsson', 'xperia', 'play'), 27.25506408052312), (('the', 'carribean', 'stranger', 'tides'), 27.224858797419493), (('canon', 'eos', 'rebel', 't2i'), 27.15618336346946), (('samsung', 'french', 'door', 'refrigerator'), 27.007749393534418), (('cell', 'phone', 'signal', 'booster'), 26.983009187305278), (('batman', 'the', 'animated', 'series'), 26.955013476702696), (('ipod', 'nano', '5th', 'generation'), 26.93876571158279), (('rings', 'war', 'the', 'north'), 26.90532213952374), (('cole', 'world', 'the', 'sideline'), 26.881707431444617), (('the', 'legend', 'zelda', 'skyward'), 26.874965853565563), (('starwars', 'the', 'old', 'republic'), 26.70948942248424), (('dark', 'side', 'the', 'moon'), 26.544528141793975), (('live', 'free', 'die', 'hard'), 26.538467161258673), (('the', 'phantom', 'the', 'opera'), 26.535998782904315), (('one', 'tree', 'hill', 'season'), 26.44013411196446), (('sony', 'extra', 'bass', 'headphones'), 26.338860212322004), (('black', 'berry', 'play', 'book'), 26.20105024919014), (('lord', 'the', 'rings', 'extended'), 26.196541684981298), (('ipod', 'touch', '3rd', 'gen'), 26.136190483149008), (('potter', 'and', 'the', 'deathly'), 26.10110321134873), (('lion', 'king', 'diamond', 'edition'), 25.952001735643748), (('elm', 'street', 'blu', 'ray'), 25.927063181481877), (('sex', 'and', 'the', 'city'), 25.861906939792917), (('harry', 'potter', 'box', 'set'), 25.817273935746627), (('blue', 'tooth', 'head', 'set'), 25.800267138699816), (('the', 'big', 'bang', 'theory'), 25.783333567858676), (('pulp', 'fiction', 'blu', 'ray'), 25.763999844638015), (('pci', 'express', 'video', 'card'), 25.741580857529947), (('digital', 'analog', 'audio', 'converter'), 25.704574438405608), (('play', 'and', 'charge', 'kit'), 25.62520024705519), (('and', 'shadow', 'the', 'colossus'), 25.618673025610825), (('lord', 'the', 'rings', 'trilogy'), 25.595637640391118), (('ipod', 'touch', '4th', 'gen'), 25.54174373308446), (('america', 'the', 'first', 'avenger'), 25.522013163474135), (('ipod', 'touch', '2nd', 'generation'), 25.49461464600477), (('call', 'duty', 'world', 'war'), 25.45498911850042), (('pirates', 'the', 'caribbean', 'stranger'), 25.40956150488627), (('touch', '4th', 'generation', 'white'), 25.290765344825736), (('washer', 'and', 'dryer', 'combo'), 25.280607126609347), (('florence', 'and', 'the', 'machine'), 25.26783905427709), (('wars', 'the', 'old', 'republic'), 25.237053433833758), (('over', 'the', 'range', 'microwave'), 25.211014860017855), (('memory', 'stick', 'pro', 'duo'), 25.18281964215418), (('guitar', 'hero', 'world', 'tour'), 25.056194226330604), (('htc', 'evo', 'extended', 'battery'), 24.826071932475834), (('high', 'school', 'the', 'dead'), 24.79414805979774), (('office', 'home', 'and', 'student'), 24.776782070972544), (('ipod', 'touch', '4th', 'generation'), 24.74987170124684), (('transformers', 'dark', 'the', 'moon'), 24.641999879665107), (('star', 'wars', 'old', 'republic'), 24.630075749768295), (('acer', 'iconia', 'tab', 'a500'), 24.618804113040902), (('wars', 'the', 'complete', 'saga'), 24.602661094356584), (('need', 'for', 'speed', 'the'), 24.582249847236447), (('microsoft', 'home', 'and', 'student'), 24.548003206976432), (('with', 'built', 'dvd', 'player'), 24.39520864354632), (('solid', 'state', 'hard', 'drive'), 24.36081542311903), (('water', 'proof', 'digital', 'cameras'), 24.35004411008986), (('ipod', 'touch', '3rd', 'generation'), 24.349184206483216), (('the', 'night', 'the', 'sun'), 24.308682117573667), (('pirates', 'the', 'carribean', 'stranger'), 24.269899761642982), (('over', 'the', 'range', 'microwaves'), 24.234911875972855), (('fast', 'and', 'the', 'furious'), 24.219206518082807), (('floyd', 'dark', 'side', 'the'), 24.015201162917194), (('ipad', 'camera', 'connection', 'kit'), 24.00704835066241), (('star', 'wars', 'bly', 'ray'), 23.945912113478222), (('car', 'stereo', 'installation', 'kit'), 23.83718499799747), (('turtle', 'beach', 'head', 'set'), 23.809163285493256), (('touch', '4th', 'generation', 'cases'), 23.518335070004724), (('men', 'first', 'class', 'blu'), 23.50496738075465), (('lil', 'wayne', 'tha', 'carter'), 23.415471252639385), (('asus', 'eee', 'pad', 'transformer'), 23.38100930406324), (('ipod', 'touch', '5th', 'generation'), 23.379137428584833), (('winnie', 'the', 'pooh', 'blu'), 23.270690113907897), (('transformers', 'dark', 'side', 'the'), 23.24934923248462), (('harry', 'potter', 'and', 'the'), 23.107264195975404), (('sony', 'cyber', 'shot', 'camera'), 23.086155037697168), (('skull', 'candy', 'head', 'phones'), 23.04420571592307), (('horrible', 'bosses', 'blu', 'ray'), 23.01008376824977), (('polk', 'audio', 'sound', 'bar'), 22.94142846390266), (('touch', '4th', 'generation', 'case'), 22.935093364049642), (('head', 'set', 'for', 'ps3'), 22.91110439878014), (('over', 'the', 'ear', 'headphones'), 22.90734836038198), (('microwave', 'over', 'the', 'range'), 22.808916416446515), (('jurassic', 'park', 'blu', 'ray'), 22.749164901513048), (('water', 'proof', 'digital', 'camera'), 22.706515127684696), (('touch', '3rd', 'generation', 'case'), 22.6994651155565), (('ico', 'and', 'shadow', 'the'), 22.6926736070546), (('over', 'the', 'ear', 'headphone'), 22.681059354420526), (('lord', 'the', 'rings', 'bluray'), 22.459740042749004), (('dark', 'the', 'moon', 'movie'), 22.236579257699454), (('hello', 'kitty', 'laptop', 'sleeve'), 22.210761610417727), (('rise', 'the', 'planet', 'the'), 22.18932594053399), (('first', 'class', 'blu', 'ray'), 22.17090230670287), (('xmen', 'first', 'class', 'blu'), 22.130769299117098), (('head', 'and', 'the', 'heart'), 22.10266931717463), (('all', 'one', 'desktop', 'computers'), 21.997844533614575), (('green', 'lantern', 'blu', 'ray'), 21.990481631896373), (('the', 'last', 'house', 'the'), 21.829155891843115), (('men', 'first', 'class', 'blue'), 21.774041335088008), (('flat', 'screen', 'wall', 'mount'), 21.769443483790575), (('phillips', 'home', 'theater', 'system'), 21.72571772800589), (('captain', 'america', 'blu', 'ray'), 21.662083131333112), (('the', 'secret', 'life', 'the'), 21.65431708451319), (('3rd', 'generation', 'ipod', 'touch'), 21.62671818201212), (('star', 'wars', 'clone', 'wars'), 21.593545169855872), (('dell', 'inspiron', 'power', 'cord'), 21.573665094175354), (('lord', 'the', 'rings', 'blu'), 21.544951775164947), (('dance', 'dance', 'revolution', 'wii'), 21.49701716338378), (('high', 'speed', 'hdmi', 'cable'), 21.47457882371684), (('virgin', 'mobile', 'motorola', 'triumph'), 21.474041844547777), (('microsoft', 'word', 'for', 'mac'), 21.452425823382868), (('star', 'wars', 'blu', 'ray'), 21.34707137241753), (('dance', 'dance', 'revolution', 'ps3'), 21.22417888858245), (('htc', 'evo', 'shift', 'phone'), 21.150825107463426), (('family', 'guy', 'star', 'wars'), 21.134347883186024), (('the', 'fast', 'and', 'furious'), 21.119670844531896), (('gateway', 'all', 'one', 'computer'), 21.10685004396003), (('pink', 'floyd', 'the', 'wall'), 21.030852990070073), (('lord', 'the', 'rings', 'blue'), 20.983950730940613), (('star', 'wars', 'the', 'old'), 20.90846746626692), (('fast', 'five', 'blu', 'ray'), 20.907733506334154), (('just', 'dance', 'for', 'wii'), 20.90186272199287), (('4th', 'generation', 'ipod', 'touch'), 20.862346430505255), (('walking', 'dead', 'blu', 'ray'), 20.859933308821056), (('jurassic', 'park', 'blue', 'ray'), 20.837666610204586), (('microsoft', 'office', 'for', 'mac'), 20.795701213413935), (('lord', 'the', 'rings', 'war'), 20.668282942222888), (('western', 'digital', 'book', 'live'), 20.659508383111316), (('lion', 'king', 'blu', 'ray'), 20.656524327226805), (('harry', 'potter', 'blu', 'ray'), 20.642246761712173), (('transformer', 'dark', 'the', 'moon'), 20.550853780043383), (('evo', 'shift', 'phone', 'case'), 20.516134744153412), (('blue', 'tooth', 'head', 'phones'), 20.49959597968288), (('ipod', 'touch', 'arm', 'band'), 20.492380795172572), (('the', 'planet', 'the', 'apes'), 20.492253073491298), (('toshiba', 'thrive', 'screen', 'protector'), 20.43524996828083), (('1tb', 'external', 'hard', 'drive'), 20.423000921166818), (('htc', 'evo', 'shift', 'case'), 20.337383158878076), (('the', 'lion', 'king', 'movie'), 20.32727210115261), (('xbox', 'wireless', 'networking', 'adapter'), 20.29832642247193), (('iphone', '3gs', 'screen', 'protector'), 20.262891888721647), (('batman', 'arkham', 'city', 'ps3'), 20.253111183118975), (('the', 'fox', 'and', 'the'), 20.04779617831845), (('wars', 'the', 'clone', 'wars'), 20.00460264394608), (('2tb', 'external', 'hard', 'drive'), 19.9894130255162), (('dual', 'band', 'wireless', 'router'), 19.987051698983485), (('western', 'digital', 'media', 'player'), 19.943571664012545), (('3tb', 'external', 'hard', 'drive'), 19.93757409399658), (('first', 'class', 'blue', 'ray'), 19.909461544337454), (('headphones', 'over', 'the', 'ear'), 19.90734836038198), (('green', 'lantern', 'blue', 'ray'), 19.894558769450484), (('star', 'wars', 'the', 'clone'), 19.878410232410737), (('samsung', 'wireless', 'lan', 'adapter'), 19.87435889836354), (('star', 'wars', 'blue', 'ray'), 19.856177907223064), (('fast', 'five', 'blue', 'ray'), 19.835770542832442), (('dark', 'the', 'moon', 'blu'), 19.79493002817329), (('dvd', 'players', 'for', 'cars'), 19.78900762547591), (('wireless', 'keyboard', 'and', 'mouse'), 19.71833992213852), (('turtle', 'beach', 'for', 'ps3'), 19.602142928297773), (('cases', 'for', 'htc', 'inspire'), 19.55749210342786), (('lil', 'wayne', 'the', 'carter'), 19.48613693932632), (('captain', 'america', 'the', 'first'), 19.45705772163337), (('virgin', 'mobile', 'cell', 'phones'), 19.438065954481274), (('boost', 'mobile', 'cell', 'phone'), 19.363345285042705), (('lion', 'king', 'blue', 'ray'), 19.331237082836765), (('star', 'wars', 'the', 'force'), 19.2786507297615), (('external', 'hard', 'drive', '2tb'), 19.252447431349992), (('seagate', 'internal', 'hard', 'drive'), 19.24396815807507), (('panasonic', 'home', 'theater', 'system'), 19.2283242779098), (('star', 'wars', 'box', 'set'), 19.217810610700347), (('star', 'wars', 'the', 'complete'), 19.134027166682046), (('the', 'crow', 'blu', 'ray'), 19.111224091466553), (('bose', 'home', 'theater', 'system'), 19.08752663917263), (('captain', 'america', 'blue', 'ray'), 19.078714274080326), (('sony', 'over', 'ear', 'headphones'), 19.04653857951996), (('sony', 'over', 'the', 'ear'), 19.032693956124078), (('usb', 'wireless', 'lan', 'adapter'), 18.8908194501309), (('digital', 'optical', 'audio', 'cable'), 18.87412111568421), (('optical', 'digital', 'audio', 'cable'), 18.87412111568421), (('hello', 'kitty', 'phone', 'case'), 18.855828503063776), (('star', 'wars', 'trilogy', 'dvd'), 18.622924468551965), (('the', 'walking', 'dead', 'blu'), 18.485453674332184), (('the', 'rings', 'blu', 'ray'), 18.482782954078523), (('seagate', 'external', 'hard', 'drive'), 18.336980941561656), (('sata', 'hard', 'drive', 'enclosure'), 18.30487400532793), (('dual', 'screen', 'portable', 'dvd'), 18.303648414430562), (('sony', 'home', 'theater', 'system'), 18.302519940153175), (('verizon', 'wireless', 'cell', 'phones'), 18.268783678003977), (('blue', 'ray', 'surround', 'sound'), 18.256419825395703), (('the', 'lord', 'the', 'rings'), 18.230072282877302), (('blue', 'ray', 'home', 'theater'), 18.219208789029835), (('the', 'pooh', 'blu', 'ray'), 18.208521292821466), (('htc', 'inspire', 'phone', 'cases'), 18.18234676072715), (('blu', 'ray', 'surround', 'sound'), 18.08045527545383), (('lap', 'top', 'hard', 'drive'), 18.063642119731988), (('samsung', 'all', 'one', 'computer'), 18.018683963582497), (('sony', 'home', 'theatre', 'system'), 17.98919840052993), (('bluetooth', 'keyboard', 'and', 'mouse'), 17.92190482310135), (('the', 'rings', 'blue', 'ray'), 17.921781909854182), (('external', 'blu', 'ray', 'burner'), 17.820030878871556), (('htc', 'inspire', 'phone', 'case'), 17.809672040711718), (('the', 'moon', 'blu', 'ray'), 17.688367087728402), (('xbox', 'slim', 'hard', 'drive'), 17.6301422774159), (('internal', 'hard', 'drive', 'desktop'), 17.5320935448717), (('samsung', 'home', 'theater', 'system'), 17.50256413845147), (('hello', 'kitty', 'ipod', 'cases'), 17.48249199899773), (('car', 'stereo', 'with', 'bluetooth'), 17.458731428977487), (('cell', 'phone', 'car', 'mount'), 17.38566920767925), (('blu', 'ray', 'home', 'theater'), 17.365172333975323), (('western', 'digital', 'external', 'hard'), 17.3414472530553), (('western', 'digital', 'hard', 'drive'), 17.3414472530553), (('dual', 'screen', 'dvd', 'player'), 17.328939540960427), (('sony', 'network', 'media', 'player'), 17.251232840676956), (('apple', 'mac', 'book', 'pro'), 17.188297925284616), (('dark', 'the', 'moon', 'dvd'), 17.079979150144304), (('wireless', 'home', 'theater', 'system'), 17.059460113315126), (('ipod', 'touch', 'screen', 'protector'), 17.023411691300204), (('the', 'lion', 'king', 'blu'), 17.01525815204304), (('dual', 'portable', 'dvd', 'player'), 16.952501690348008), (('hello', 'kitty', 'laptop', 'case'), 16.915183976805146), (('hello', 'kitty', 'iphone', 'case'), 16.882123278964606), (('wireless', 'surround', 'sound', 'system'), 16.874278728344542), (('htc', 'evo', 'phone', 'case'), 16.866474729274096), (('htc', 'evo', 'phone', 'cases'), 16.864753934508023), (('phone', 'cases', 'for', 'htc'), 16.86456399374798), (('the', 'fast', 'and', 'the'), 16.8386510808618), (('dell', 'laptop', 'power', 'cord'), 16.776999490276957), (('external', 'hard', 'drive', 'for'), 16.763369365177034), (('touch', 'screen', 'car', 'stereos'), 16.678208126457008), (('panasonic', 'blu', 'ray', 'player'), 16.633785858989455), (('mac', 'book', 'pro', 'battery'), 16.629675920346727), (('blue', 'ray', 'dvd', 'player'), 16.627271669550574), (('wireless', 'card', 'for', 'desktop'), 16.59261635525904), (('portable', 'blu', 'ray', 'player'), 16.580480356289996), (('ipod', 'touch', 'with', 'camera'), 16.5310845033099), (('apple', 'mac', 'book', 'air'), 16.515950665272868), (('seagate', 'portable', 'hard', 'drive'), 16.489821631240027), (('wireless', 'mouse', 'and', 'keyboard'), 16.485679165348245), (('hello', 'kitty', 'ipod', 'case'), 16.484212793763803), (('external', 'hard', 'drive', 'enclosure'), 16.401939305638997), (('touch', 'screen', 'car', 'stereo'), 16.343489791689912), (('the', 'rings', 'war', 'the'), 16.231634486647586), (('external', 'hard', 'drive', 'western'), 16.215108069525492), (('external', 'hard', 'drive', 'firewire'), 16.13995070995675), (('hard', 'drive', 'for', 'mac'), 16.13738561600087), (('toshiba', 'laptop', 'power', 'cord'), 16.076491461094356), (('otterbox', 'for', 'ipod', 'touch'), 16.04402111256252), (('pirates', 'the', 'caribbean', 'dvd'), 15.943485043490014), (('samsung', 'galaxy', 'tab', 'accessories'), 15.923544777657256), (('mac', 'book', 'pro', 'charger'), 15.822210017478966), (('the', 'lion', 'king', 'dvd'), 15.759738892651356), (('hard', 'drive', 'western', 'digital'), 15.663375347942662), (('netgear', 'wireless', 'usb', 'adapter'), 15.638538100593983), (('sony', 'blu', 'ray', 'player'), 15.589337024734213), (('usb', 'wireless', 'network', 'adapter'), 15.537803677459443), (('the', 'head', 'and', 'the'), 15.528517337849394), (('wireless', 'surround', 'sound', 'speakers'), 15.425540277408032), (('sony', 'blue', 'ray', 'player'), 15.4015533040941), (('hdmi', 'cable', 'for', 'xbox'), 15.366669862049548), (('samsung', 'blu', 'ray', 'player'), 15.355484696502351), (('dvd', 'player', 'for', 'car'), 15.345806940602436), (('mac', 'book', 'pro', 'case'), 15.344859480114621), (('blue', 'ray', 'dvd', 'players'), 15.304116271279213), (('touch', 'screen', 'digital', 'cameras'), 15.203938045841973), (('htc', 'evo', 'hdmi', 'cable'), 15.055055771332079), (('blu', 'ray', 'dvd', 'player'), 15.011394951690825), (('touch', 'screen', 'car', 'audio'), 14.865950873949977), (('ipod', 'adapter', 'for', 'car'), 14.715890858037625), (('car', 'charger', 'for', 'laptop'), 14.562870258449543), (('blu', 'ray', 'dvd', 'players'), 14.4500798162247), (('samsung', 'blu', 'ray', 'players'), 14.410840921484713), (('portable', 'external', 'hard', 'drive'), 14.356151950980674), (('car', 'stereo', 'touch', 'screen'), 14.343489791689905), (('internal', 'hard', 'drive', 'laptop'), 14.316598862532082), (('internal', 'laptop', 'hard', 'drive'), 14.316598862532082), (('screen', 'portable', 'dvd', 'player'), 14.313323070806248), (('samsung', 'blue', 'ray', 'player'), 14.302630555948348), (('cases', 'for', 'ipod', 'touch'), 14.132045860359739), (('samsung', 'galaxy', 'tablet', 'accessories'), 14.072807865728372), (('external', 'blu', 'ray', 'drive'), 13.968551536572335), (('external', 'hard', 'drive', 'case'), 13.951851722970787), (('usb', 'hard', 'drive', 'adapter'), 13.91501629330115), (('toshiba', 'external', 'hard', 'drive'), 13.86516559846853), (('touch', 'screen', 'digital', 'camera'), 13.8234434692706), (('sony', 'camera', 'battery', 'charger'), 13.803481800916643), (('ipod', 'touch', 'wall', 'charger'), 13.728060315242878), (('digital', 'external', 'hard', 'drive'), 13.607184420361023), (('samsung', 'galaxy', 'tab', 'case'), 13.588934412408989), (('external', 'usb', 'hard', 'drive'), 13.394163126005623), (('sony', 'vaio', 'laptop', 'charger'), 13.308128234694387), (('external', 'portable', 'hard', 'drive'), 13.21864842723074), (('external', 'hard', 'drive', 'mac'), 13.117895893488843), (('samsung', 'galaxy', 'tab', 'cases'), 12.945667588555395), (('mac', 'external', 'hard', 'drive'), 12.85486148765505), (('samsung', 'blue', 'ray', 'dvd'), 12.583435760700496), (('sony', 'portable', 'dvd', 'player'), 12.530704389359201), (('wireless', 'external', 'hard', 'drive'), 12.05062234544139), (('portable', 'car', 'dvd', 'player'), 11.45316269923584), (('ipod', 'touch', 'hard', 'case'), 10.861017640753033)]\n"
     ]
    }
   ],
   "source": [
    "## check PMI score\n",
    "print(f\"Bigrams: {finder_bi.score_ngrams(bigram_measures.pmi)[:20]}\")\n",
    "print(f\"Trigrams: {finder_tri.score_ngrams(trigram_measures.pmi)[:20]}\")\n",
    "print(f\"Quadgrams: {finder_quad.score_ngrams(quadgram_measures.pmi)}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 4: combine candidate list from PMI and likelihood ratio\n",
    "only keep phrases that shown in top 1000 in both lists. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "intersection=[]\n",
    "for finder_tup in [(finder_bi,bigram_measures),(finder_tri,trigram_measures),(finder_quad,quadgram_measures)]:\n",
    "#for finder_tup in [(finder_bi,bigram_measures),(finder_tri,trigram_measures)]:\n",
    "    \n",
    "    finder=finder_tup[0]\n",
    "    finder_measure=finder_tup[1]\n",
    "    intersection += [value for value in finder.nbest(finder_measure.likelihood_ratio, 1000) \n",
    "                     if value in finder.nbest(finder_measure.pmi, 1000)] "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('puddle', 'mudd'),\n",
       " ('tim', 'mcgraw'),\n",
       " ('chemical', 'romance'),\n",
       " ('always', 'sunny'),\n",
       " ('avril', 'lavigne'),\n",
       " ('carrie', 'underwood'),\n",
       " ('smashing', 'pumpkins'),\n",
       " ('velvet', 'revolver'),\n",
       " ('pans', 'labyrinth'),\n",
       " ('various', 'artists'),\n",
       " ('bobby', 'valentino'),\n",
       " ('brad', 'paisley'),\n",
       " ('stranger', 'tides'),\n",
       " ('boardwalk', 'empire'),\n",
       " ('before', 'christmas'),\n",
       " ('its', 'always'),\n",
       " ('assassins', 'creed'),\n",
       " ('brantley', 'gilbert'),\n",
       " ('double', 'din'),\n",
       " ('criminal', 'minds'),\n",
       " ('mortal', 'kombat'),\n",
       " ('skyward', 'sword'),\n",
       " ('take', 'care'),\n",
       " ('pulp', 'fiction'),\n",
       " ('jeff', 'dunham'),\n",
       " ('cyber', 'shot'),\n",
       " ('kitchen', 'aid'),\n",
       " ('deathly', 'hallows'),\n",
       " ('david', 'guetta'),\n",
       " ('ace', 'combat'),\n",
       " ('noise', 'canceling'),\n",
       " ('french', 'door'),\n",
       " ('taylor', 'swift'),\n",
       " ('heart', 'rate'),\n",
       " ('mass', 'effect'),\n",
       " ('kung', 'panda'),\n",
       " ('wears', 'prada'),\n",
       " ('hocus', 'pocus'),\n",
       " ('mindless', 'behavior'),\n",
       " ('paranormal', 'activity'),\n",
       " ('foo', 'fighters'),\n",
       " ('gossip', 'girl'),\n",
       " ('something', 'borrowed'),\n",
       " ('ncaa', 'football'),\n",
       " ('sunny', 'philadelphia'),\n",
       " ('pistol', 'annies'),\n",
       " ('randy', 'orton'),\n",
       " ('saints', 'row'),\n",
       " ('rosetta', 'stone'),\n",
       " ('devil', 'wears'),\n",
       " ('elder', 'scrolls'),\n",
       " ('rick', 'ross'),\n",
       " ('professor', 'layton'),\n",
       " ('jill', 'scott'),\n",
       " ('britney', 'spears'),\n",
       " ('elm', 'street'),\n",
       " ('snow', 'leopard'),\n",
       " ('noise', 'cancelling'),\n",
       " ('doctor', 'who'),\n",
       " ('age', 'empires'),\n",
       " ('willy', 'wonka'),\n",
       " ('leap', 'frog'),\n",
       " ('flight', 'simulator'),\n",
       " ('avenged', 'sevenfold'),\n",
       " ('demi', 'lovato'),\n",
       " ('ben', 'hur'),\n",
       " ('gran', 'turismo'),\n",
       " ('citizen', 'kane'),\n",
       " ('tiger', 'woods'),\n",
       " ('spooky', 'buddies'),\n",
       " ('thermal', 'paste'),\n",
       " ('display', 'port'),\n",
       " ('hank', 'williams'),\n",
       " ('alice', 'cooper'),\n",
       " ('mylo', 'xyloto'),\n",
       " ('juice', 'pack'),\n",
       " ('desperate', 'housewives'),\n",
       " ('complete', 'saga'),\n",
       " ('toy', 'story'),\n",
       " ('janes', 'addiction'),\n",
       " ('coast', 'customs'),\n",
       " ('tech', 'n9ne'),\n",
       " ('marvel', 'capcom'),\n",
       " ('jane', 'eyre'),\n",
       " ('jersey', 'shore'),\n",
       " ('leandria', 'johnson'),\n",
       " ('street', 'fighter'),\n",
       " ('scooby', 'doo'),\n",
       " ('hands', 'free'),\n",
       " ('kevin', 'hart'),\n",
       " ('rice', 'cooker'),\n",
       " ('tori', 'amos'),\n",
       " ('rockford', 'fosgate'),\n",
       " ('alien', 'ware'),\n",
       " ('cool', 'pix'),\n",
       " ('country', 'strong'),\n",
       " ('monte', 'carlo'),\n",
       " ('jake', 'owen'),\n",
       " ('deep', 'fryer'),\n",
       " ('little', 'mermaid'),\n",
       " ('driver', 'san'),\n",
       " ('hallows', 'part'),\n",
       " ('little', 'liars'),\n",
       " ('tribe', 'called'),\n",
       " ('definitive', 'technology'),\n",
       " ('battle', 'field'),\n",
       " ('tyler', 'perry'),\n",
       " ('american', 'capitalist'),\n",
       " ('breaking', 'benjamin'),\n",
       " ('luke', 'bryan'),\n",
       " ('medal', 'honor'),\n",
       " ('kingdom', 'hearts'),\n",
       " ('bruno', 'mars'),\n",
       " ('selena', 'gomez'),\n",
       " ('burn', 'notice'),\n",
       " ('universe', 'online'),\n",
       " ('came', 'romans'),\n",
       " ('straw', 'dogs'),\n",
       " ('called', 'quest'),\n",
       " ('that', '70s'),\n",
       " ('toby', 'keith'),\n",
       " ('cobra', 'starship'),\n",
       " ('williams', 'iii'),\n",
       " ('butch', 'walker'),\n",
       " ('transform', 'ultra'),\n",
       " ('fullmetal', 'alchemist'),\n",
       " ('kid', 'cudi'),\n",
       " ('counting', 'crows'),\n",
       " ('jimi', 'hendrix'),\n",
       " ('san', 'francisco'),\n",
       " ('nicki', 'minaj'),\n",
       " ('wiz', 'khalifa'),\n",
       " ('creed', 'revelations'),\n",
       " ('70s', 'show'),\n",
       " ('love', 'lucy'),\n",
       " ('wrong', 'turn'),\n",
       " ('ninja', 'turtles'),\n",
       " ('johnny', 'gill'),\n",
       " ('chris', 'brown'),\n",
       " ('found', 'glory'),\n",
       " ('sansa', 'clip'),\n",
       " ('dawsons', 'creek'),\n",
       " ('gucci', 'mane'),\n",
       " ('serbian', 'film'),\n",
       " ('sleepy', 'hollow'),\n",
       " ('soda', 'stream'),\n",
       " ('george', 'foreman'),\n",
       " ('tracking', 'device'),\n",
       " ('brides', 'maids'),\n",
       " ('jason', 'aldean'),\n",
       " ('katy', 'perry'),\n",
       " ('garth', 'brooks'),\n",
       " ('lupe', 'fiasco'),\n",
       " ('grave', 'encounters'),\n",
       " ('good', 'wife'),\n",
       " ('league', 'legends'),\n",
       " ('rocket', 'fish'),\n",
       " ('crazy', 'stupid'),\n",
       " ('under', 'cabinet'),\n",
       " ('eric', 'church'),\n",
       " ('gavin', 'degraw'),\n",
       " ('star', 'wars', 'complete'),\n",
       " ('star', 'wars', 'blurry'),\n",
       " ('star', 'wars', 'bly'),\n",
       " ('lion', 'king', 'movie'),\n",
       " ('men', 'first', 'class'),\n",
       " ('lil', 'wayne', 'ceilings'),\n",
       " ('xmen', 'first', 'class'),\n",
       " ('turtle', 'beach', 'x11'),\n",
       " ('turtle', 'beach', 'x41'),\n",
       " ('turtle', 'beach', 'x12'),\n",
       " ('turtle', 'beach', 'px5'),\n",
       " ('turtle', 'beach', 'x31'),\n",
       " ('turtle', 'beach', 'px21'),\n",
       " ('turtle', 'beach', 'p11'),\n",
       " ('turtle', 'beach', 'dx11'),\n",
       " ('turtle', 'beach', 'px3'),\n",
       " ('turtle', 'beach', 'dpx21'),\n",
       " ('turtle', 'beach', 'xp500'),\n",
       " ('batman', 'arkham', 'city'),\n",
       " ('green', 'lantern', 'movie'),\n",
       " ('gears', 'war', 'vault'),\n",
       " ('big', 'bang', 'theory'),\n",
       " ('gears', 'war', 'limited'),\n",
       " ('watch', 'the', 'throne'),\n",
       " ('guitar', 'hero', 'warriors'),\n",
       " ('pirates', 'the', 'caribbean'),\n",
       " ('call', 'duty', 'modern'),\n",
       " ('home', 'theater', 'system'),\n",
       " ('call', 'duty', 'black'),\n",
       " ('duty', 'modern', 'warfare'),\n",
       " ('call', 'duty', 'mw3'),\n",
       " ('call', 'duty', 'world'),\n",
       " ('call', 'duty', 'elite'),\n",
       " ('call', 'duty', 'hardened'),\n",
       " ('lord', 'the', 'rings'),\n",
       " ('fast', 'five', 'movie'),\n",
       " ('home', 'theater', 'systems'),\n",
       " ('digital', 'picture', 'frame'),\n",
       " ('modern', 'warfare', 'hardened'),\n",
       " ('home', 'theater', 'sistem'),\n",
       " ('arkham', 'city', 'collectors'),\n",
       " ('microsoft', 'office', 'professional'),\n",
       " ('dark', 'the', 'moon'),\n",
       " ('harry', 'potter', 'and'),\n",
       " ('lego', 'harry', 'potter'),\n",
       " ('harry', 'potter', 'part'),\n",
       " ('harry', 'potter', 'deathly'),\n",
       " ('harry', 'potter', 'collection'),\n",
       " ('harry', 'potter', 'ultimate'),\n",
       " ('harry', 'potter', 'set'),\n",
       " ('skull', 'candy', 'headphones'),\n",
       " ('otter', 'box', 'commuter'),\n",
       " ('skull', 'candy', 'titan'),\n",
       " ('skull', 'candy', 'hesh'),\n",
       " ('otter', 'box', 'defender'),\n",
       " ('skull', 'candy', 'earbuds'),\n",
       " ('atx', 'power', 'supply'),\n",
       " ('grand', 'theft', 'auto'),\n",
       " ('corsair', 'power', 'supply'),\n",
       " ('wall', 'mount', 'shelf'),\n",
       " ('12v', 'power', 'supply'),\n",
       " ('power', 'supply', 'unit'),\n",
       " ('motion', 'wall', 'mount'),\n",
       " ('memory', 'stick', 'duo'),\n",
       " ('surround', 'sound', 'systems'),\n",
       " ('sons', 'anarchy', 'season'),\n",
       " ('just', 'dance', 'summer'),\n",
       " ('portable', 'air', 'conditioner'),\n",
       " ('need', 'for', 'speed'),\n",
       " ('universal', 'remote', 'control'),\n",
       " ('pirates', 'the', 'carribean'),\n",
       " ('digital', 'picture', 'frames'),\n",
       " ('how', 'met', 'your'),\n",
       " ('ipod', 'nano', '6th'),\n",
       " ('one', 'tree', 'hill'),\n",
       " ('high', 'school', 'musical'),\n",
       " ('big', 'bang', 'theroy'),\n",
       " ('bang', 'theory', 'season'),\n",
       " ('window', 'air', 'conditioner'),\n",
       " ('digital', 'photo', 'frames'),\n",
       " ('batman', 'arkham', 'asylum'),\n",
       " ('htc', 'evo', 'shift'),\n",
       " ('god', 'war', 'origins'),\n",
       " ('met', 'your', 'mother'),\n",
       " ('cell', 'phone', 'booster'),\n",
       " ('command', 'and', 'conquer'),\n",
       " ('all', 'one', 'computer'),\n",
       " ('all', 'one', 'computers'),\n",
       " ('hot', 'chili', 'peppers'),\n",
       " ('god', 'war', 'collection'),\n",
       " ('finger', 'death', 'punch'),\n",
       " ('all', 'one', 'printer'),\n",
       " ('all', 'one', 'printers'),\n",
       " ('cell', 'phone', 'signal'),\n",
       " ('pirates', 'the', 'caribean'),\n",
       " ('red', 'hot', 'chili'),\n",
       " ('nine', 'inch', 'nails'),\n",
       " ('gateway', 'all', 'one'),\n",
       " ('its', 'always', 'sunny'),\n",
       " ('htc', 'evo', 'extended'),\n",
       " ('greys', 'anatomy', 'season'),\n",
       " ('pirates', 'the', 'carribbean'),\n",
       " ('cobra', 'radar', 'detector'),\n",
       " ('contract', 'cell', 'phones'),\n",
       " ('electronic', 'picture', 'frame'),\n",
       " ('pirates', 'the', 'carribian'),\n",
       " ('cell', 'phone', 'boosters'),\n",
       " ('pirates', 'the', 'carabean'),\n",
       " ('pirates', 'the', 'carrabean'),\n",
       " ('five', 'finger', 'death'),\n",
       " ('pearl', 'jam', 'twenty'),\n",
       " ('dragon', 'ball', 'kai'),\n",
       " ('dance', 'dance', 'revolution'),\n",
       " ('jurassic', 'park', 'blu'),\n",
       " ('jurassic', 'park', 'trilogy'),\n",
       " ('stick', 'pro', 'duo'),\n",
       " ('garmin', 'nuvi', '1450lmt'),\n",
       " ('acer', 'iconia', 'a500'),\n",
       " ('dragon', 'ball', 'ultimate'),\n",
       " ('fall', 'out', 'boy'),\n",
       " ('sex', 'and', 'the'),\n",
       " ('three', 'six', 'mafia'),\n",
       " ('transformers', 'dark', 'side'),\n",
       " ('flat', 'screen', 'tvs'),\n",
       " ('batman', 'arkam', 'city'),\n",
       " ('home', 'theatre', 'systems'),\n",
       " ('acer', 'iconia', 'a100'),\n",
       " ('western', 'digital', 'passport'),\n",
       " ('pink', 'floyd', 'discovery'),\n",
       " ('pink', 'floyd', 'sampler'),\n",
       " ('western', 'digital', '2tb'),\n",
       " ('western', 'digital', '1tb'),\n",
       " ('nightmare', 'before', 'christmas'),\n",
       " ('western', 'digital', 'scorpio'),\n",
       " ('batman', 'arkum', 'city'),\n",
       " ('the', 'caribbean', 'stranger'),\n",
       " ('side', 'the', 'moon'),\n",
       " ('the', 'throne', 'deluxe'),\n",
       " ('the', 'walking', 'dead'),\n",
       " ('stomp', 'the', 'yard'),\n",
       " ('potable', 'dvd', 'players'),\n",
       " ('xbox', 'live', 'gold'),\n",
       " ('rock', 'band', 'guitar'),\n",
       " ('how', 'train', 'your'),\n",
       " ('theft', 'auto', 'san'),\n",
       " ('xbox', 'live', 'membership'),\n",
       " ('xbox', 'live', 'month'),\n",
       " ('credit', 'card', 'reader'),\n",
       " ('watch', 'the', 'thrown'),\n",
       " ('final', 'fantasy', 'xiii'),\n",
       " ('lord', 'the', 'ring'),\n",
       " ('always', 'sunny', 'philadelphia'),\n",
       " ('square', 'card', 'reader'),\n",
       " ('lady', 'gaga', 'heartbeats'),\n",
       " ('jay', 'kanye', 'west'),\n",
       " ('tom', 'tom', 'xxl'),\n",
       " ('anti', 'virus', 'software'),\n",
       " ('final', 'fantasy', 'xiv'),\n",
       " ('and', 'kanye', 'west'),\n",
       " ('norton', 'anti', 'virus'),\n",
       " ('kelly', 'clarkson', 'stronger'),\n",
       " ('the', 'rings', 'extended'),\n",
       " ('batman', 'year', 'one'),\n",
       " ('for', 'speed', 'most'),\n",
       " ('duty', 'black', 'ops'),\n",
       " ('how', 'meet', 'your'),\n",
       " ('family', 'guy', 'volume'),\n",
       " ('for', 'speed', 'carbon'),\n",
       " ('tree', 'hill', 'season'),\n",
       " ('for', 'speed', 'undercover'),\n",
       " ('high', 'school', 'the'),\n",
       " ('for', 'speed', 'hot'),\n",
       " ('sharp', 'aquos', 'quattron'),\n",
       " ('canon', 'powershot', 'sx30'),\n",
       " ('canon', 'powershot', 's95'),\n",
       " ('canon', 'powershot', 'elph'),\n",
       " ('spider', 'man', 'edge'),\n",
       " ('michael', 'buble', 'christmas'),\n",
       " ('canon', 'powershot', 'sx130'),\n",
       " ('remote', 'control', 'helicopter'),\n",
       " ('canon', 'powershot', 'sx230'),\n",
       " ('the', 'vampire', 'diaries'),\n",
       " ('caribbean', 'stranger', 'tides'),\n",
       " ('canon', 'powershot', 'sx40'),\n",
       " ('canon', 'powershot', 'g12'),\n",
       " ('wars', 'bly', 'ray'),\n",
       " ('vampire', 'diaries', 'season'),\n",
       " ('always', 'sunny', 'philidelphia'),\n",
       " ('always', 'sunny', 'philedelphia'),\n",
       " ('black', 'ops', 'pre'),\n",
       " ('horrible', 'bosses', 'blu'),\n",
       " ('black', 'ops', 'controller'),\n",
       " ('star', 'wars', 'blu', 'ray'),\n",
       " ('lion', 'king', 'blu', 'ray'),\n",
       " ('external', 'hard', 'drive', 'enclosure'),\n",
       " ('star', 'wars', 'blue', 'ray'),\n",
       " ('captain', 'america', 'blu', 'ray'),\n",
       " ('first', 'class', 'blu', 'ray'),\n",
       " ('wireless', 'external', 'hard', 'drive'),\n",
       " ('1tb', 'external', 'hard', 'drive'),\n",
       " ('seagate', 'external', 'hard', 'drive'),\n",
       " ('2tb', 'external', 'hard', 'drive'),\n",
       " ('mac', 'external', 'hard', 'drive'),\n",
       " ('external', 'hard', 'drive', 'mac'),\n",
       " ('external', 'hard', 'drive', 'case'),\n",
       " ('portable', 'external', 'hard', 'drive'),\n",
       " ('digital', 'external', 'hard', 'drive'),\n",
       " ('toshiba', 'external', 'hard', 'drive'),\n",
       " ('external', 'hard', 'drive', 'for'),\n",
       " ('3tb', 'external', 'hard', 'drive'),\n",
       " ('external', 'hard', 'drive', '2tb'),\n",
       " ('external', 'hard', 'drive', 'firewire'),\n",
       " ('external', 'hard', 'drive', 'western'),\n",
       " ('green', 'lantern', 'blu', 'ray'),\n",
       " ('blu', 'ray', 'dvd', 'player'),\n",
       " ('fast', 'five', 'blu', 'ray'),\n",
       " ('blu', 'ray', 'home', 'theater'),\n",
       " ('lap', 'top', 'hard', 'drive'),\n",
       " ('harry', 'potter', 'blu', 'ray'),\n",
       " ('western', 'digital', 'hard', 'drive'),\n",
       " ('hard', 'drive', 'western', 'digital'),\n",
       " ('blu', 'ray', 'surround', 'sound'),\n",
       " ('family', 'guy', 'star', 'wars'),\n",
       " ('star', 'wars', 'bly', 'ray'),\n",
       " ('internal', 'hard', 'drive', 'laptop'),\n",
       " ('seagate', 'internal', 'hard', 'drive'),\n",
       " ('internal', 'hard', 'drive', 'desktop'),\n",
       " ('sata', 'hard', 'drive', 'enclosure'),\n",
       " ('star', 'wars', 'old', 'republic'),\n",
       " ('star', 'wars', 'complete', 'saga'),\n",
       " ('external', 'usb', 'hard', 'drive'),\n",
       " ('usb', 'hard', 'drive', 'adapter'),\n",
       " ('seagate', 'portable', 'hard', 'drive'),\n",
       " ('external', 'portable', 'hard', 'drive'),\n",
       " ('star', 'wars', 'clone', 'wars'),\n",
       " ('jurassic', 'park', 'blu', 'ray'),\n",
       " ('star', 'wars', 'the', 'old'),\n",
       " ('xbox', 'slim', 'hard', 'drive'),\n",
       " ('solid', 'state', 'hard', 'drive'),\n",
       " ('star', 'wars', 'trilogy', 'dvd'),\n",
       " ('star', 'wars', 'the', 'clone'),\n",
       " ('star', 'wars', 'box', 'set'),\n",
       " ('blu', 'ray', 'dvd', 'players'),\n",
       " ('samsung', 'galaxy', 'tab', 'case'),\n",
       " ('star', 'wars', 'the', 'complete'),\n",
       " ('internal', 'laptop', 'hard', 'drive'),\n",
       " ('star', 'wars', 'the', 'force'),\n",
       " ('samsung', 'galaxy', 'tab', 'accessories'),\n",
       " ('samsung', 'galaxy', 'tab', 'cases'),\n",
       " ('hard', 'drive', 'for', 'mac'),\n",
       " ('the', 'moon', 'blu', 'ray'),\n",
       " ('the', 'rings', 'blu', 'ray'),\n",
       " ('walking', 'dead', 'blu', 'ray'),\n",
       " ('samsung', 'blu', 'ray', 'player'),\n",
       " ('sony', 'blu', 'ray', 'player'),\n",
       " ('horrible', 'bosses', 'blu', 'ray'),\n",
       " ('panasonic', 'blu', 'ray', 'player'),\n",
       " ('portable', 'blu', 'ray', 'player'),\n",
       " ('pulp', 'fiction', 'blu', 'ray'),\n",
       " ('elm', 'street', 'blu', 'ray'),\n",
       " ('the', 'pooh', 'blu', 'ray'),\n",
       " ('samsung', 'blu', 'ray', 'players'),\n",
       " ('the', 'crow', 'blu', 'ray'),\n",
       " ('external', 'blu', 'ray', 'burner'),\n",
       " ('external', 'blu', 'ray', 'drive'),\n",
       " ('ipod', 'touch', 'screen', 'protector'),\n",
       " ('lion', 'king', 'blue', 'ray'),\n",
       " ('ipod', 'touch', '4th', 'generation'),\n",
       " ('ipod', 'touch', '3rd', 'generation'),\n",
       " ('4th', 'generation', 'ipod', 'touch'),\n",
       " ('ipod', 'touch', '5th', 'generation'),\n",
       " ('ipod', 'touch', '2nd', 'generation'),\n",
       " ('ipod', 'touch', 'arm', 'band'),\n",
       " ('ipod', 'touch', '4th', 'gen'),\n",
       " ('3rd', 'generation', 'ipod', 'touch'),\n",
       " ('ipod', 'touch', '3rd', 'gen'),\n",
       " ('ipod', 'touch', 'hard', 'case'),\n",
       " ('ipod', 'touch', 'with', 'camera'),\n",
       " ('ipod', 'touch', 'wall', 'charger'),\n",
       " ('cases', 'for', 'ipod', 'touch'),\n",
       " ('otterbox', 'for', 'ipod', 'touch'),\n",
       " ('the', 'lion', 'king', 'dvd'),\n",
       " ('the', 'lion', 'king', 'blu'),\n",
       " ('the', 'lion', 'king', 'movie'),\n",
       " ('sony', 'portable', 'dvd', 'player'),\n",
       " ('dual', 'portable', 'dvd', 'player'),\n",
       " ('screen', 'portable', 'dvd', 'player'),\n",
       " ('captain', 'america', 'blue', 'ray'),\n",
       " ('first', 'class', 'blue', 'ray'),\n",
       " ('call', 'duty', 'modern', 'warfare'),\n",
       " ('lil', 'wayne', 'tha', 'carter'),\n",
       " ('green', 'lantern', 'blue', 'ray'),\n",
       " ('lion', 'king', 'diamond', 'edition'),\n",
       " ('men', 'first', 'class', 'blu'),\n",
       " ('blue', 'ray', 'dvd', 'player'),\n",
       " ('men', 'first', 'class', 'blue'),\n",
       " ('lil', 'wayne', 'the', 'carter'),\n",
       " ('virgin', 'mobile', 'cell', 'phones'),\n",
       " ('fast', 'five', 'blue', 'ray'),\n",
       " ('xmen', 'first', 'class', 'blu'),\n",
       " ('blue', 'ray', 'home', 'theater'),\n",
       " ('hello', 'kitty', 'iphone', 'case'),\n",
       " ('samsung', 'galaxy', 'tablet', 'accessories'),\n",
       " ('turtle', 'beach', 'head', 'set'),\n",
       " ('virgin', 'mobile', 'motorola', 'triumph'),\n",
       " ('turtle', 'beach', 'for', 'ps3'),\n",
       " ('batman', 'arkham', 'city', 'collectors'),\n",
       " ('batman', 'arkham', 'city', 'ps3'),\n",
       " ('blue', 'ray', 'surround', 'sound'),\n",
       " ('western', 'digital', 'external', 'hard'),\n",
       " ('captain', 'america', 'the', 'first'),\n",
       " ('call', 'duty', 'black', 'ops'),\n",
       " ('transformers', 'dark', 'the', 'moon'),\n",
       " ('skull', 'candy', 'head', 'phones'),\n",
       " ('portable', 'car', 'dvd', 'player'),\n",
       " ('apple', 'mac', 'book', 'pro'),\n",
       " ('the', 'big', 'bang', 'theory'),\n",
       " ('big', 'bang', 'theory', 'season'),\n",
       " ('flat', 'screen', 'wall', 'mount'),\n",
       " ('gears', 'war', 'limited', 'edition'),\n",
       " ('gears', 'war', 'season', 'pass'),\n",
       " ('dvd', 'player', 'for', 'car'),\n",
       " ('with', 'built', 'dvd', 'player'),\n",
       " ('multi', 'region', 'dvd', 'player'),\n",
       " ('jurassic', 'park', 'blue', 'ray'),\n",
       " ('mac', 'book', 'pro', 'case'),\n",
       " ('dual', 'screen', 'dvd', 'player'),\n",
       " ('mac', 'book', 'pro', 'charger'),\n",
       " ('mac', 'book', 'pro', 'battery'),\n",
       " ('sony', 'home', 'theater', 'system'),\n",
       " ('pirates', 'the', 'caribbean', 'stranger'),\n",
       " ('watch', 'the', 'throne', 'deluxe'),\n",
       " ('blue', 'ray', 'dvd', 'players'),\n",
       " ('guitar', 'hero', 'warriors', 'rock'),\n",
       " ('blue', 'tooth', 'head', 'phones'),\n",
       " ('pirates', 'the', 'caribbean', 'dvd'),\n",
       " ('guitar', 'hero', 'world', 'tour'),\n",
       " ('bose', 'home', 'theater', 'system'),\n",
       " ('apple', 'mac', 'book', 'air'),\n",
       " ('harry', 'potter', 'and', 'the'),\n",
       " ('samsung', 'home', 'theater', 'system'),\n",
       " ('wireless', 'home', 'theater', 'system'),\n",
       " ('panasonic', 'home', 'theater', 'system'),\n",
       " ('phillips', 'home', 'theater', 'system'),\n",
       " ('duty', 'modern', 'warfare', 'hardened'),\n",
       " ('the', 'rings', 'blue', 'ray'),\n",
       " ('memory', 'stick', 'pro', 'duo'),\n",
       " ('call', 'duty', 'world', 'war'),\n",
       " ('modern', 'warfare', 'hardened', 'edition'),\n",
       " ('microsoft', 'office', 'for', 'mac'),\n",
       " ('hello', 'kitty', 'ipod', 'cases'),\n",
       " ('the', 'lord', 'the', 'rings'),\n",
       " ('hello', 'kitty', 'laptop', 'sleeve'),\n",
       " ('lord', 'the', 'rings', 'blu'),\n",
       " ('lord', 'the', 'rings', 'trilogy'),\n",
       " ('lord', 'the', 'rings', 'extended'),\n",
       " ('lord', 'the', 'rings', 'bluray'),\n",
       " ('lord', 'the', 'rings', 'blue'),\n",
       " ('harry', 'potter', 'deathly', 'hallows'),\n",
       " ('lord', 'the', 'rings', 'war'),\n",
       " ('samsung', 'blue', 'ray', 'player'),\n",
       " ('sony', 'blue', 'ray', 'player'),\n",
       " ('hello', 'kitty', 'laptop', 'case'),\n",
       " ('hello', 'kitty', 'ipod', 'case'),\n",
       " ('hello', 'kitty', 'phone', 'case'),\n",
       " ('htc', 'evo', 'hdmi', 'cable'),\n",
       " ('sony', 'vaio', 'laptop', 'charger'),\n",
       " ('boost', 'mobile', 'cell', 'phone'),\n",
       " ('how', 'met', 'your', 'mother'),\n",
       " ('arkham', 'city', 'collectors', 'edition'),\n",
       " ('dark', 'the', 'moon', 'dvd'),\n",
       " ('transformer', 'dark', 'the', 'moon'),\n",
       " ('dark', 'the', 'moon', 'blu'),\n",
       " ('dark', 'the', 'moon', 'movie'),\n",
       " ('harry', 'potter', 'box', 'set'),\n",
       " ('dual', 'screen', 'portable', 'dvd'),\n",
       " ('harry', 'potter', 'ultimate', 'edition'),\n",
       " ('touch', 'screen', 'car', 'stereo'),\n",
       " ('car', 'stereo', 'touch', 'screen'),\n",
       " ('full', 'motion', 'wall', 'mount'),\n",
       " ('polk', 'audio', 'sound', 'bar'),\n",
       " ('wireless', 'surround', 'sound', 'system'),\n",
       " ('grand', 'theft', 'auto', 'san'),\n",
       " ('dual', 'band', 'wireless', 'router'),\n",
       " ('red', 'hot', 'chili', 'peppers'),\n",
       " ('car', 'stereo', 'installation', 'kit'),\n",
       " ('wireless', 'surround', 'sound', 'speakers'),\n",
       " ('need', 'for', 'speed', 'most'),\n",
       " ('just', 'dance', 'summer', 'party'),\n",
       " ('need', 'for', 'speed', 'carbon'),\n",
       " ('just', 'dance', 'for', 'wii'),\n",
       " ('car', 'stereo', 'with', 'bluetooth'),\n",
       " ('need', 'for', 'speed', 'undercover'),\n",
       " ('need', 'for', 'speed', 'the'),\n",
       " ('need', 'for', 'speed', 'hot'),\n",
       " ('five', 'finger', 'death', 'punch'),\n",
       " ('pirates', 'the', 'carribean', 'stranger'),\n",
       " ('samsung', 'blue', 'ray', 'dvd'),\n",
       " ('ipod', 'nano', '5th', 'generation'),\n",
       " ('cell', 'phone', 'signal', 'booster'),\n",
       " ('water', 'proof', 'digital', 'camera'),\n",
       " ('toshiba', 'laptop', 'power', 'cord'),\n",
       " ('ipod', 'nano', '6th', 'generation'),\n",
       " ('touch', 'screen', 'digital', 'camera'),\n",
       " ('the', 'caribbean', 'stranger', 'tides'),\n",
       " ('sex', 'and', 'the', 'city'),\n",
       " ('toshiba', 'thrive', 'screen', 'protector'),\n",
       " ('blue', 'tooth', 'head', 'set'),\n",
       " ('sony', 'home', 'theatre', 'system'),\n",
       " ('dell', 'laptop', 'power', 'cord'),\n",
       " ('blue', 'tooth', 'head', 'sets'),\n",
       " ('how', 'meet', 'your', 'mother'),\n",
       " ('one', 'tree', 'hill', 'season'),\n",
       " ('all', 'one', 'desktop', 'computers'),\n",
       " ('dell', 'inspiron', 'power', 'cord'),\n",
       " ('htc', 'evo', 'shift', 'case'),\n",
       " ('its', 'always', 'sunny', 'philadelphia'),\n",
       " ('htc', 'evo', 'shift', 'phone'),\n",
       " ('god', 'war', 'origins', 'collection'),\n",
       " ('met', 'your', 'mother', 'season'),\n",
       " ('gateway', 'all', 'one', 'computer'),\n",
       " ('samsung', 'all', 'one', 'computer'),\n",
       " ('htc', 'evo', 'phone', 'cases'),\n",
       " ('western', 'digital', 'media', 'player'),\n",
       " ('cell', 'phone', 'signal', 'boosters'),\n",
       " ('htc', 'evo', 'extended', 'battery'),\n",
       " ('htc', 'evo', 'phone', 'case'),\n",
       " ('red', 'hot', 'chilli', 'peppers'),\n",
       " ('its', 'always', 'sunny', 'philidelphia'),\n",
       " ('its', 'always', 'sunny', 'philedelphia'),\n",
       " ('hdmi', 'cable', 'for', 'xbox'),\n",
       " ('high', 'speed', 'hdmi', 'cable'),\n",
       " ('cell', 'phone', 'car', 'mount'),\n",
       " ('verizon', 'wireless', 'cell', 'phones'),\n",
       " ('dark', 'side', 'the', 'moon'),\n",
       " ('pink', 'floyd', 'dark', 'side'),\n",
       " ('transformers', 'dark', 'side', 'the'),\n",
       " ('touch', 'screen', 'car', 'audio'),\n",
       " ('dance', 'dance', 'revolution', 'ps3'),\n",
       " ('dance', 'dance', 'revolution', 'wii'),\n",
       " ('dragon', 'ball', 'ultimate', 'tenkaichi'),\n",
       " ('how', 'train', 'your', 'dragon'),\n",
       " ('acer', 'iconia', 'tab', 'a500'),\n",
       " ('pink', 'floyd', 'the', 'wall'),\n",
       " ('western', 'digital', 'book', 'live'),\n",
       " ('the', 'nightmare', 'before', 'christmas'),\n",
       " ('iphone', '3gs', 'screen', 'protector'),\n",
       " ('touch', 'screen', 'car', 'stereos'),\n",
       " ('xbox', 'live', 'gold', 'membership'),\n",
       " ('the', 'walking', 'dead', 'blu'),\n",
       " ('the', 'carribean', 'stranger', 'tides'),\n",
       " ('dvd', 'players', 'for', 'cars'),\n",
       " ('theft', 'auto', 'san', 'andreas'),\n",
       " ('water', 'proof', 'digital', 'cameras'),\n",
       " ('jay', 'and', 'kanye', 'west'),\n",
       " ('and', 'the', 'deathly', 'hallows'),\n",
       " ('for', 'speed', 'most', 'wanted'),\n",
       " ('asus', 'eee', 'pad', 'transformer'),\n",
       " ('touch', 'screen', 'digital', 'cameras'),\n",
       " ('high', 'school', 'the', 'dead'),\n",
       " ('for', 'speed', 'the', 'run'),\n",
       " ('the', 'rings', 'war', 'the'),\n",
       " ('microsoft', 'word', 'for', 'mac'),\n",
       " ('fast', 'and', 'the', 'furious'),\n",
       " ('for', 'speed', 'hot', 'pursuit'),\n",
       " ('htc', 'inspire', 'phone', 'cases'),\n",
       " ('spider', 'man', 'edge', 'time'),\n",
       " ('htc', 'inspire', 'phone', 'case'),\n",
       " ('beauty', 'and', 'the', 'beast'),\n",
       " ('potter', 'and', 'the', 'deathly'),\n",
       " ('wireless', 'keyboard', 'and', 'mouse'),\n",
       " ('black', 'ops', 'pre', 'owned'),\n",
       " ('cases', 'for', 'htc', 'inspire'),\n",
       " ('the', 'fast', 'and', 'the'),\n",
       " ('willy', 'wonka', 'and', 'the'),\n",
       " ('winnie', 'the', 'pooh', 'blu'),\n",
       " ('sony', 'camera', 'battery', 'charger'),\n",
       " ('fox', 'and', 'the', 'hound'),\n",
       " ('legend', 'zelda', 'skyward', 'sword'),\n",
       " ('over', 'the', 'ear', 'headphones'),\n",
       " ('the', 'devil', 'wears', 'prada'),\n",
       " ('the', 'fox', 'and', 'the'),\n",
       " ('netgear', 'wireless', 'usb', 'adapter'),\n",
       " ('touch', '4th', 'generation', 'case'),\n",
       " ('the', 'deathly', 'hallows', 'part'),\n",
       " ('touch', '4th', 'generation', 'cases'),\n",
       " ('the', 'fast', 'and', 'furious'),\n",
       " ('touch', '4th', 'generation', 'white'),\n",
       " ('the', 'planet', 'the', 'apes'),\n",
       " ('the', 'secret', 'life', 'the'),\n",
       " ('sony', 'cyber', 'shot', 'camera'),\n",
       " ('smokey', 'and', 'the', 'bandit'),\n",
       " ('the', 'head', 'and', 'the'),\n",
       " ('and', 'the', 'chocolate', 'factory'),\n",
       " ('wars', 'the', 'old', 'republic'),\n",
       " ('wonka', 'and', 'the', 'chocolate'),\n",
       " ('alvin', 'and', 'the', 'chipmunks'),\n",
       " ('jem', 'and', 'the', 'holograms'),\n",
       " ('live', 'free', 'die', 'hard'),\n",
       " ('florence', 'and', 'the', 'machine'),\n",
       " ('sony', 'cyber', 'shot', 'wx9'),\n",
       " ('canon', 'eos', 'rebel', 't3i'),\n",
       " ('princess', 'and', 'the', 'frog'),\n",
       " ('head', 'and', 'the', 'heart'),\n",
       " ('canon', 'eos', 'rebel', 't2i'),\n",
       " ('starwars', 'the', 'old', 'republic'),\n",
       " ('nightmare', 'elm', 'street', 'blu'),\n",
       " ('sony', 'over', 'the', 'ear'),\n",
       " ('headphones', 'over', 'the', 'ear'),\n",
       " ('jeff', 'dunham', 'controlled', 'chaos'),\n",
       " ('the', 'hills', 'have', 'eyes'),\n",
       " ('stackable', 'washer', 'and', 'dryer'),\n",
       " ('over', 'the', 'range', 'microwave'),\n",
       " ('over', 'the', 'ear', 'headphone'),\n",
       " ('ace', 'combat', 'assault', 'horizon'),\n",
       " ('wireless', 'mouse', 'and', 'keyboard'),\n",
       " ('digital', 'optical', 'audio', 'cable'),\n",
       " ('usb', 'wireless', 'lan', 'adapter'),\n",
       " ('kitchen', 'aid', 'stand', 'mixer'),\n",
       " ('washer', 'and', 'dryer', 'combo'),\n",
       " ('pci', 'express', 'video', 'card'),\n",
       " ('samsung', 'french', 'door', 'refrigerator'),\n",
       " ('transformers', 'revenge', 'the', 'fallen'),\n",
       " ('usb', 'wireless', 'network', 'adapter'),\n",
       " ('two', 'and', 'half', 'men'),\n",
       " ('mophie', 'juice', 'pack', 'plus'),\n",
       " ('microwave', 'over', 'the', 'range'),\n",
       " ('over', 'the', 'range', 'microwaves'),\n",
       " ('mini', 'display', 'port', 'hdmi'),\n",
       " ('touch', '3rd', 'generation', 'case'),\n",
       " ('mophie', 'juice', 'pack', 'air'),\n",
       " ('optical', 'digital', 'audio', 'cable'),\n",
       " ('taylor', 'swift', 'speak', 'now'),\n",
       " ('samsung', 'wireless', 'lan', 'adapter'),\n",
       " ('evo', 'shift', 'phone', 'case'),\n",
       " ('and', 'shadow', 'the', 'colossus'),\n",
       " ('law', 'and', 'order', 'svu'),\n",
       " ('saints', 'row', 'the', 'third'),\n",
       " ('bluetooth', 'keyboard', 'and', 'mouse'),\n",
       " ('teenage', 'mutant', 'ninja', 'turtles'),\n",
       " ('sony', 'network', 'media', 'player'),\n",
       " ('sony', 'over', 'ear', 'headphones'),\n",
       " ('wars', 'the', 'clone', 'wars'),\n",
       " ('the', 'legend', 'zelda', 'skyward'),\n",
       " ('floyd', 'dark', 'side', 'the'),\n",
       " ('rocky', 'horror', 'picture', 'show'),\n",
       " ('alice', 'cooper', 'welcome', 'nightmare'),\n",
       " ('cole', 'world', 'the', 'sideline'),\n",
       " ('head', 'set', 'for', 'ps3'),\n",
       " ('madeas', 'big', 'happy', 'family'),\n",
       " ('car', 'charger', 'for', 'laptop'),\n",
       " ('night', 'the', 'living', 'dead'),\n",
       " ('the', 'phantom', 'the', 'opera'),\n",
       " ('black', 'berry', 'play', 'book'),\n",
       " ('sony', 'ericsson', 'xperia', 'play'),\n",
       " ('skullcandy', 'full', 'metal', 'jacket'),\n",
       " ('life', 'the', 'american', 'teenager'),\n",
       " ('scooby', 'doo', 'where', 'are'),\n",
       " ('tori', 'amos', 'night', 'hunters'),\n",
       " ('nikon', 'cool', 'pix', 'l120'),\n",
       " ('return', 'the', 'living', 'dead'),\n",
       " ('wars', 'the', 'complete', 'saga'),\n",
       " ('east', 'bound', 'and', 'down'),\n",
       " ('secret', 'life', 'the', 'american'),\n",
       " ('gone', 'with', 'the', 'wind'),\n",
       " ('xbox', 'data', 'transfer', 'cable'),\n",
       " ('hands', 'free', 'car', 'kit'),\n",
       " ('madea', 'big', 'happy', 'family'),\n",
       " ('new', 'kids', 'the', 'block'),\n",
       " ('phone', 'cases', 'for', 'htc'),\n",
       " ('wireless', 'card', 'for', 'desktop'),\n",
       " ('ico', 'and', 'shadow', 'the'),\n",
       " ('peter', 'gabriel', 'new', 'blood'),\n",
       " ('rise', 'the', 'planet', 'the'),\n",
       " ('nothing', 'but', 'the', 'beat'),\n",
       " ('spartacus', 'gods', 'the', 'arena'),\n",
       " ('big', 'sean', 'finally', 'famous'),\n",
       " ('dark', 'night', 'the', 'scarecrow'),\n",
       " ('xbox', 'wireless', 'networking', 'adapter'),\n",
       " ('batman', 'the', 'animated', 'series'),\n",
       " ('heroes', 'might', 'and', 'magic'),\n",
       " ('house', 'the', 'dead', 'overkill'),\n",
       " ('fear', 'and', 'loathing', 'las'),\n",
       " ('wwe', 'money', 'the', 'bank'),\n",
       " ('america', 'the', 'first', 'avenger'),\n",
       " ('sony', 'extra', 'bass', 'headphones'),\n",
       " ('brother', 'where', 'art', 'thou'),\n",
       " ('last', 'house', 'the', 'left'),\n",
       " ('tales', 'from', 'the', 'crypt'),\n",
       " ('who', 'framed', 'roger', 'rabbit'),\n",
       " ('buffy', 'the', 'vampire', 'slayer'),\n",
       " ('keeping', 'with', 'the', 'kardashians'),\n",
       " ('laugh', 'keep', 'from', 'crying'),\n",
       " ('what', 'dreams', 'may', 'come'),\n",
       " ('the', 'last', 'house', 'the'),\n",
       " ('stray', 'from', 'the', 'path'),\n",
       " ('planes', 'trains', 'and', 'automobiles'),\n",
       " ('ipod', 'adapter', 'for', 'car'),\n",
       " ('avatar', 'the', 'last', 'airbender'),\n",
       " ('two', 'door', 'cinema', 'club'),\n",
       " ('and', 'loathing', 'las', 'vegas'),\n",
       " ('the', 'night', 'the', 'sun'),\n",
       " ('dance', 'the', 'vampire', 'bund'),\n",
       " ('aqua', 'teen', 'hunger', 'force'),\n",
       " ('rage', 'against', 'the', 'machine'),\n",
       " ('the', 'thin', 'red', 'line'),\n",
       " ('country', 'for', 'old', 'men'),\n",
       " ('world', 'the', 'sideline', 'story'),\n",
       " ('the', 'count', 'monte', 'cristo'),\n",
       " ('wicked', 'this', 'way', 'comes'),\n",
       " ('cabelas', 'big', 'game', 'hunter'),\n",
       " ('super', 'smash', 'bros', 'brawl'),\n",
       " ('scott', 'pilgrim', 'the', 'world'),\n",
       " ('get', 'him', 'the', 'greek'),\n",
       " ('office', 'home', 'and', 'student'),\n",
       " ('night', 'the', 'sun', 'came'),\n",
       " ('something', 'wicked', 'this', 'way'),\n",
       " ('charred', 'walls', 'the', 'damned'),\n",
       " ('death', 'cab', 'for', 'cutie'),\n",
       " ('microsoft', 'home', 'and', 'student'),\n",
       " ('breville', 'juice', 'fountain', 'plus'),\n",
       " ('save', 'the', 'last', 'dance'),\n",
       " ('not', 'that', 'into', 'you'),\n",
       " ('hes', 'just', 'not', 'that'),\n",
       " ('music', 'better', 'than', 'words'),\n",
       " ('when', 'fish', 'ride', 'bicycles'),\n",
       " ('ipad', 'camera', 'connection', 'kit'),\n",
       " ('fresh', 'prince', 'bel', 'air'),\n",
       " ('doo', 'where', 'are', 'you'),\n",
       " ('hot', 'tub', 'time', 'machine'),\n",
       " ('clap', 'your', 'hands', 'say'),\n",
       " ('your', 'hands', 'say', 'yeah'),\n",
       " ('just', 'not', 'that', 'into'),\n",
       " ('why', 'did', 'get', 'married'),\n",
       " ('fast', 'times', 'ridgemont', 'high'),\n",
       " ('the', 'red', 'jumpsuit', 'apparatus'),\n",
       " ('once', 'upon', 'time', 'the'),\n",
       " ('wars', 'the', 'force', 'unleashed'),\n",
       " ('beautiful', 'dark', 'twisted', 'fantasy'),\n",
       " ('people', 'under', 'the', 'stairs'),\n",
       " ('soulja', 'boy', 'the', 'movie'),\n",
       " ('rings', 'war', 'the', 'north'),\n",
       " ('digital', 'analog', 'audio', 'converter'),\n",
       " ('let', 'the', 'right', 'one'),\n",
       " ('upon', 'time', 'the', 'west'),\n",
       " ('queens', 'the', 'stone', 'age'),\n",
       " ('seven', 'brides', 'for', 'seven'),\n",
       " ('brides', 'for', 'seven', 'brothers'),\n",
       " ('play', 'and', 'charge', 'kit')]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "intersection"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 5: further filter bi-gram to get Noun phrases\n",
    "fiter based on POS tagging patterns JJ_NN or NN_NN."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "bi_gram_noun_phrases=[]\n",
    "for phrase_token in intersection:\n",
    "    POS = nltk.pos_tag(phrase_token)\n",
    "    POS_first_word = POS[0][1]\n",
    "    POS_second_word = POS[1][1]\n",
    "    if POS_first_word in ['NN','JJ'] and POS_second_word == 'NN':\n",
    "        #print(\"dddd\")\n",
    "        bi_gram_noun_phrases.append(' '.join([POS[0][0],POS[1][0]]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['puddle mudd',\n",
       " 'tim mcgraw',\n",
       " 'chemical romance',\n",
       " 'avril lavigne',\n",
       " 'carrie underwood',\n",
       " 'velvet revolver',\n",
       " 'bobby valentino',\n",
       " 'brad paisley',\n",
       " 'boardwalk empire',\n",
       " 'brantley gilbert',\n",
       " 'double din',\n",
       " 'mortal kombat',\n",
       " 'skyward sword',\n",
       " 'pulp fiction',\n",
       " 'jeff dunham',\n",
       " 'cyber shot',\n",
       " 'kitchen aid',\n",
       " 'david guetta',\n",
       " 'ace combat',\n",
       " 'noise canceling',\n",
       " 'french door',\n",
       " 'taylor swift',\n",
       " 'heart rate',\n",
       " 'mass effect',\n",
       " 'kung panda',\n",
       " 'hocus pocus',\n",
       " 'mindless behavior',\n",
       " 'paranormal activity',\n",
       " 'gossip girl',\n",
       " 'ncaa football',\n",
       " 'sunny philadelphia',\n",
       " 'randy orton',\n",
       " 'rosetta stone',\n",
       " 'rick ross',\n",
       " 'professor layton',\n",
       " 'jill scott',\n",
       " 'elm street',\n",
       " 'snow leopard',\n",
       " 'noise cancelling',\n",
       " 'leap frog',\n",
       " 'flight simulator',\n",
       " 'demi lovato',\n",
       " 'ben hur',\n",
       " 'gran turismo',\n",
       " 'thermal paste',\n",
       " 'display port',\n",
       " 'alice cooper',\n",
       " 'mylo xyloto',\n",
       " 'juice pack',\n",
       " 'complete saga',\n",
       " 'toy story',\n",
       " 'tech n9ne',\n",
       " 'marvel capcom',\n",
       " 'jane eyre',\n",
       " 'jersey shore',\n",
       " 'street fighter',\n",
       " 'scooby doo',\n",
       " 'rice cooker',\n",
       " 'rockford fosgate',\n",
       " 'alien ware',\n",
       " 'cool pix',\n",
       " 'monte carlo',\n",
       " 'jake owen',\n",
       " 'deep fryer',\n",
       " 'little mermaid',\n",
       " 'driver san',\n",
       " 'definitive technology',\n",
       " 'battle field',\n",
       " 'tyler perry',\n",
       " 'american capitalist',\n",
       " 'luke bryan',\n",
       " 'selena gomez',\n",
       " 'burn notice',\n",
       " 'universe online',\n",
       " 'toby keith',\n",
       " 'cobra starship',\n",
       " 'butch walker',\n",
       " 'transform ultra',\n",
       " 'fullmetal alchemist',\n",
       " 'kid cudi',\n",
       " 'jimi hendrix',\n",
       " 'san francisco',\n",
       " 'wiz khalifa',\n",
       " 'love lucy',\n",
       " 'wrong turn',\n",
       " 'johnny gill',\n",
       " 'sansa clip',\n",
       " 'gucci mane',\n",
       " 'serbian film',\n",
       " 'sleepy hollow',\n",
       " 'soda stream',\n",
       " 'george foreman',\n",
       " 'jason aldean',\n",
       " 'katy perry',\n",
       " 'lupe fiasco',\n",
       " 'good wife',\n",
       " 'rocket fish',\n",
       " 'crazy stupid',\n",
       " 'eric church',\n",
       " 'gavin degraw',\n",
       " 'lion king',\n",
       " 'lil wayne',\n",
       " 'turtle beach',\n",
       " 'turtle beach',\n",
       " 'turtle beach',\n",
       " 'turtle beach',\n",
       " 'turtle beach',\n",
       " 'turtle beach',\n",
       " 'turtle beach',\n",
       " 'turtle beach',\n",
       " 'turtle beach',\n",
       " 'turtle beach',\n",
       " 'turtle beach',\n",
       " 'big bang',\n",
       " 'guitar hero',\n",
       " 'home theater',\n",
       " 'call duty',\n",
       " 'home theater',\n",
       " 'digital picture',\n",
       " 'modern warfare',\n",
       " 'home theater',\n",
       " 'arkham city',\n",
       " 'microsoft office',\n",
       " 'harry potter',\n",
       " 'lego harry',\n",
       " 'harry potter',\n",
       " 'harry potter',\n",
       " 'harry potter',\n",
       " 'harry potter',\n",
       " 'harry potter',\n",
       " 'skull candy',\n",
       " 'otter box',\n",
       " 'skull candy',\n",
       " 'skull candy',\n",
       " 'otter box',\n",
       " 'skull candy',\n",
       " 'atx power',\n",
       " 'grand theft',\n",
       " 'corsair power',\n",
       " 'wall mount',\n",
       " 'power supply',\n",
       " 'motion wall',\n",
       " 'memory stick',\n",
       " 'surround sound',\n",
       " 'portable air',\n",
       " 'digital picture',\n",
       " 'ipod nano',\n",
       " 'high school',\n",
       " 'big bang',\n",
       " 'bang theory',\n",
       " 'window air',\n",
       " 'digital photo',\n",
       " 'htc evo',\n",
       " 'god war',\n",
       " 'cell phone',\n",
       " 'hot chili',\n",
       " 'god war',\n",
       " 'finger death',\n",
       " 'cell phone',\n",
       " 'htc evo',\n",
       " 'cobra radar',\n",
       " 'contract cell',\n",
       " 'electronic picture',\n",
       " 'cell phone',\n",
       " 'pearl jam',\n",
       " 'dragon ball',\n",
       " 'dance dance',\n",
       " 'jurassic park',\n",
       " 'jurassic park',\n",
       " 'acer iconia',\n",
       " 'dragon ball',\n",
       " 'flat screen',\n",
       " 'home theatre',\n",
       " 'acer iconia',\n",
       " 'pink floyd',\n",
       " 'pink floyd',\n",
       " 'potable dvd',\n",
       " 'rock band',\n",
       " 'theft auto',\n",
       " 'credit card',\n",
       " 'final fantasy',\n",
       " 'square card',\n",
       " 'lady gaga',\n",
       " 'jay kanye',\n",
       " 'tom tom',\n",
       " 'final fantasy',\n",
       " 'batman year',\n",
       " 'family guy',\n",
       " 'tree hill',\n",
       " 'high school',\n",
       " 'sharp aquos',\n",
       " 'canon powershot',\n",
       " 'canon powershot',\n",
       " 'canon powershot',\n",
       " 'spider man',\n",
       " 'canon powershot',\n",
       " 'canon powershot',\n",
       " 'caribbean stranger',\n",
       " 'canon powershot',\n",
       " 'canon powershot',\n",
       " 'captain america',\n",
       " 'blu ray',\n",
       " 'blu ray',\n",
       " 'harry potter',\n",
       " 'hard drive',\n",
       " 'blu ray',\n",
       " 'family guy',\n",
       " 'jurassic park',\n",
       " 'solid state',\n",
       " 'blu ray',\n",
       " 'samsung galaxy',\n",
       " 'samsung galaxy',\n",
       " 'samsung galaxy',\n",
       " 'hard drive',\n",
       " 'samsung blu',\n",
       " 'sony blu',\n",
       " 'panasonic blu',\n",
       " 'portable blu',\n",
       " 'pulp fiction',\n",
       " 'elm street',\n",
       " 'samsung blu',\n",
       " 'external blu',\n",
       " 'external blu',\n",
       " 'ipod touch',\n",
       " 'sony portable',\n",
       " 'dual portable',\n",
       " 'captain america',\n",
       " 'blue ray',\n",
       " 'virgin mobile',\n",
       " 'blue ray',\n",
       " 'samsung galaxy',\n",
       " 'turtle beach',\n",
       " 'turtle beach',\n",
       " 'blue ray',\n",
       " 'call duty',\n",
       " 'skull candy',\n",
       " 'portable car',\n",
       " 'apple mac',\n",
       " 'big bang',\n",
       " 'flat screen',\n",
       " 'dvd player',\n",
       " 'multi region',\n",
       " 'jurassic park',\n",
       " 'dual screen',\n",
       " 'sony home',\n",
       " 'blue ray',\n",
       " 'guitar hero',\n",
       " 'guitar hero',\n",
       " 'apple mac',\n",
       " 'harry potter',\n",
       " 'samsung home',\n",
       " 'wireless home',\n",
       " 'panasonic home',\n",
       " 'modern warfare',\n",
       " 'microsoft office',\n",
       " 'hello kitty',\n",
       " 'harry potter',\n",
       " 'samsung blue',\n",
       " 'sony blue',\n",
       " 'htc evo',\n",
       " 'sony vaio',\n",
       " 'boost mobile',\n",
       " 'arkham city',\n",
       " 'harry potter',\n",
       " 'dual screen',\n",
       " 'harry potter',\n",
       " 'car stereo',\n",
       " 'full motion',\n",
       " 'polk audio',\n",
       " 'wireless surround',\n",
       " 'grand theft',\n",
       " 'dual band',\n",
       " 'car stereo',\n",
       " 'wireless surround',\n",
       " 'car stereo',\n",
       " 'samsung blue',\n",
       " 'cell phone',\n",
       " 'water proof',\n",
       " 'sony home',\n",
       " 'dell inspiron',\n",
       " 'htc evo',\n",
       " 'htc evo',\n",
       " 'god war',\n",
       " 'cell phone',\n",
       " 'htc evo',\n",
       " 'hdmi cable',\n",
       " 'high speed',\n",
       " 'cell phone',\n",
       " 'verizon wireless',\n",
       " 'dark side',\n",
       " 'dance dance',\n",
       " 'dance dance',\n",
       " 'dragon ball',\n",
       " 'acer iconia',\n",
       " 'theft auto',\n",
       " 'water proof',\n",
       " 'asus eee',\n",
       " 'high school',\n",
       " 'microsoft word',\n",
       " 'htc inspire',\n",
       " 'spider man',\n",
       " 'htc inspire',\n",
       " 'wireless keyboard',\n",
       " 'sony camera',\n",
       " 'netgear wireless',\n",
       " 'sony cyber',\n",
       " 'sony cyber',\n",
       " 'jeff dunham',\n",
       " 'stackable washer',\n",
       " 'ace combat',\n",
       " 'wireless mouse',\n",
       " 'usb wireless',\n",
       " 'kitchen aid',\n",
       " 'pci express',\n",
       " 'usb wireless',\n",
       " 'mophie juice',\n",
       " 'mini display',\n",
       " 'mophie juice',\n",
       " 'taylor swift',\n",
       " 'samsung wireless',\n",
       " 'evo shift',\n",
       " 'sony network',\n",
       " 'rocky horror',\n",
       " 'cole world',\n",
       " 'car charger',\n",
       " 'black berry',\n",
       " 'sony ericsson',\n",
       " 'scooby doo',\n",
       " 'nikon cool',\n",
       " 'east bound',\n",
       " 'secret life',\n",
       " 'wireless card',\n",
       " 'big sean',\n",
       " 'dark night',\n",
       " 'xbox wireless',\n",
       " 'wwe money',\n",
       " 'last house',\n",
       " 'laugh keep',\n",
       " 'ipod adapter',\n",
       " 'super smash',\n",
       " 'office home',\n",
       " 'death cab',\n",
       " 'microsoft home',\n",
       " 'breville juice',\n",
       " 'ipad camera',\n",
       " 'fresh prince',\n",
       " 'hot tub',\n",
       " 'beautiful dark',\n",
       " 'digital analog']"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "bi_gram_noun_phrases"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
